######################################################################
####
#### Replication script for:
#### Thrall, Calvin. Public-Private Governance Initiatives
#### and Corporate Responses to Stakeholder Complaints. 
#### International Organization.
####
######################################################################

# Created using R version 4.0.2

################
##
### packages
##
################

require(tidyverse) # version 1.3.0
require(ggrepel) # version 0.8.2
require(stargazer) # version 5.2.2
require(margins) # version 0.3.23
require(prediction) # version 0.3.14
require(lme4) # version 1.1-23
require(stringr) # version 1.4.0
require(AER) # version 1.2-9
require(broom) # version 0.7.0
require(sandwich) # version 2.5-1
require(skimr) # version 2.1.2
require(xtable) # version 1.8-4
require(survival) # version 3.2-3
require(lubridate) # version 1.7.9

################
##
### data
##
################

# primary dataset

repdata_main <- read_csv("ungc_rep_data.csv")

# dataset for appendix model with MSCI ESG ratings

repdata_msci <- read_csv("msci_model_data.csv")

# dataset for multi-firm claims (claims with at least 4 named firms)
repdata_multi <- repdata_main %>%
  group_by(Summary) %>%
  filter(is.na(Summary) == F) %>% 
  mutate(firmsnamed = n()) %>%
  filter(firmsnamed >= 4) %>%
  ungroup()

## data for figure 3

figure3_data <- read_csv("figure3_data.csv")

################################################################
##
### Modeling & Regression Tables - Main Text
##
################################################################

##################################
## 2FE models - full sample
#
## Table 2
##################################

basemod1lm <- lm(response_dummy ~ UNGC_Member,
                 data = repdata_main) 
basemod1lm_r <-  coeftest(basemod1lm, vcovHC(basemod1lm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod2lm <- lm(response_dummy ~ UNGC_Member + as.factor(naics2digit) + as.factor(Year),
                 data = repdata_main)
basemod2lm_r <- basemod2lm %>% 
  coeftest(vcovHC(basemod2lm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod3lm <- lm(response_dummy ~ UNGC_Member + as.factor(Company) + as.factor(Year),
                 data = repdata_main)
basemod3lm_r <- basemod3lm %>% 
  coeftest(vcovHC(basemod3lm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod4lm <- lm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                   v2csreprss_home + v2csreprss_host,
                 data = repdata_main)
basemod4lm_r <- basemod4lm %>% 
  coeftest(vcovHC(basemod4lm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod5lm <- lm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                   v2csreprss_home + v2csreprss_host + as.factor(Year) + as.factor(naics2digit),
                 data = repdata_main)
basemod5lm_r <- basemod5lm %>% 
  coeftest(vcovHC(basemod5lm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod6lm <- lm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                   v2csreprss_home + v2csreprss_host + as.factor(Year) + as.factor(Company),
                 data = repdata_main)
basemod6lm_r <- basemod6lm %>% 
  coeftest(vcovHC(basemod6lm, type = 'HC0', cluster = c('Company', 'Summary')))

## table 2 (to get estimates w/ cluster robust SEs)
stargazer(basemod1lm_r, basemod2lm_r, basemod3lm_r, basemod4lm_r,
          basemod5lm_r, basemod6lm_r, omit = c("as.factor"))

# table 2 (to get obs + r squared stats)
stargazer(basemod1lm, basemod2lm, basemod3lm, basemod4lm,
          basemod5lm, basemod6lm, omit = c("as.factor"))

##################################
## 2FE models - multi-firm claims
#
## Table 4
##################################

multimod1lm <- lm(response_dummy ~ UNGC_Member,
                    data = repdata_multi)
multimod1lm_r <- multimod1lm %>% 
  coeftest(vcovHC(multimod1lm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod2lm <- lm(response_dummy ~ UNGC_Member + as.factor(naics2digit) + as.factor(Year),
                  data = repdata_multi)
multimod2lm_r <- multimod2lm %>% 
  coeftest(vcovHC(multimod2lm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod3lm <- lm(response_dummy ~ UNGC_Member + as.factor(Summary) + as.factor(Year) +
                    as.factor(naics2digit),
                    data = repdata_multi)
multimod3lm_r <- multimod3lm %>% 
  coeftest(vcovHC(multimod3lm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod4lm <- lm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                    v2csreprss_home + v2csreprss_host, data = repdata_multi)
multimod4lm_r <- multimod4lm %>% 
  coeftest(vcovHC(multimod4lm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod5lm <- lm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                      v2csreprss_home + v2csreprss_host + as.factor(Year) +
                      as.factor(naics2digit), data = repdata_multi)
multimod5lm_r <- multimod5lm %>% 
  coeftest(vcovHC(multimod5lm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod6lm <- lm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                      v2csreprss_home + v2csreprss_host + as.factor(Year) +
                      as.factor(Summary) + as.factor(naics2digit),
                    data = repdata_multi)
multimod6lm_r <- multimod6lm %>% 
  coeftest(vcovHC(multimod6lm, type = 'HC0', cluster = c('Company', 'Summary')))

## table 4 (to get estimates w/ cluster robust SEs)
stargazer(multimod1lm_r, multimod2lm_r, multimod3lm_r, multimod4lm_r,
          multimod5lm_r, multimod6lm_r, omit = c("as.factor"))

# table 4 (to get obs + r squared stats)
stargazer(multimod1lm, multimod2lm, multimod3lm, multimod4lm,
          multimod5lm, multimod6lm, omit = c("as.factor"))

##################################
## IV models - full sample
#
## Table 5
##################################

ivmod1 <- ivreg(response_dummy ~ UNGC_Member| sector_avg + country_avg, data = repdata_main)

ivmod2 <- ivreg(response_dummy ~ UNGC_Member + prior_claims + log_total_assets|
                  sector_avg + country_avg + prior_claims + log_total_assets, data = repdata_main)

ivmod3 <- ivreg(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host |
                  sector_avg + country_avg + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host, data = repdata_main)

ivmod4 <- ivreg(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(Year) |
                  sector_avg + country_avg + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(Year), data = repdata_main)

ivmod5 <- ivreg(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(naics2digit) |
                  sector_avg + country_avg + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(naics2digit), data = repdata_main)

ivmod6 <- ivreg(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(Year) + as.factor(naics2digit)|
                  sector_avg + country_avg + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(Year) + as.factor(naics2digit),
                data = repdata_main)

stargazer(ivmod1, ivmod2, ivmod3, ivmod4, ivmod5, ivmod6,
          omit = c("as.factor"))

# Note: the first stage F-stats and Hausman statistics reported in
# Table 5 were acquired as follows:

summary(ivmod1, diagnostics = T)

summary(ivmod2, diagnostics = T)

summary(ivmod3, diagnostics = T)

summary(ivmod4, diagnostics = T)

summary(ivmod5, diagnostics = T)

summary(ivmod6, diagnostics = T)

################################################################
##
### Modeling - Appendix
##
################################################################

##################################
## Logit models - full sample
#
## Appendix Table 1
##################################

basemod1glm <- glm(response_dummy ~ UNGC_Member, family = binomial(link = "logit"),
                   data = repdata_main)
basemod1glm_r <- basemod1glm %>% 
  coeftest(vcovHC(basemod1glm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod2glm <- glm(response_dummy ~ UNGC_Member + as.factor(naics2digit) + as.factor(Year),
                family = binomial(link = "logit"), data = repdata_main)
basemod2glm_r <- basemod2glm %>% 
  coeftest(vcovHC(basemod2glm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod3glm <- glm(response_dummy ~ UNGC_Member + as.factor(Company) + as.factor(Year),
                family = binomial(link = "logit"), data = repdata_main)
basemod3glm_r <- basemod3glm %>% 
  coeftest(vcovHC(basemod3glm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod4glm <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host,
                family = binomial(link = "logit"), data = repdata_main)
basemod4glm_r <- basemod4glm %>% 
  coeftest(vcovHC(basemod4glm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod5glm <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(Year) +
                  as.factor(naics2digit),
                family = binomial(link = "logit"), data = repdata_main)
basemod5glm_r <- basemod5glm %>% 
  coeftest(vcovHC(basemod5glm, type = 'HC0', cluster = c('Company', 'Summary')))

basemod6glm <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(Year) + as.factor(Company),
                family = binomial(link = "logit"), data = repdata_main)
basemod6glm_r <- basemod6glm %>% 
  coeftest(vcovHC(basemod6glm, type = 'HC0', cluster = c('Company', 'Summary')))

## appendix table 1 (to get estimates w/ cluster robust SEs)
stargazer(basemod1glm_r, basemod2glm_r, basemod3glm_r, basemod4glm_r,
          basemod5glm_r, basemod6glm_r, omit = c("as.factor"))

# appendix table 1 (to get obs + fit stats)
stargazer(basemod1glm, basemod2glm, basemod3glm, basemod4glm,
          basemod5glm, basemod6glm, omit = c("as.factor"))

##################################
## Logit models - multi-firm claims
#
## Appendix Table 2
##################################

multimod1glm <- glm(response_dummy ~ UNGC_Member,
                  data = repdata_multi, family = binomial(link = "logit"))
multimod1glm_r <- multimod1glm %>% 
  coeftest(vcovHC(multimod1glm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod2glm <- glm(response_dummy ~ UNGC_Member + as.factor(naics2digit) + as.factor(Year),
                  data = repdata_multi, family = binomial(link = "logit"))
multimod2glm_r <- multimod2glm %>% 
  coeftest(vcovHC(multimod2glm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod3glm <- glm(response_dummy ~ UNGC_Member + as.factor(Summary) + as.factor(Year) +
                      as.factor(naics2digit),
                  data = repdata_multi, family = binomial(link = "logit"))
multimod3glm_r <- multimod3glm %>% 
  coeftest(vcovHC(multimod3glm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod4glm <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                    v2csreprss_home + v2csreprss_host,
                  data = repdata_multi, family = binomial(link = "logit"))
multimod4glm_r <- multimod4glm %>% 
  coeftest(vcovHC(multimod4glm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod5glm <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                    v2csreprss_home + v2csreprss_host + as.factor(Summary) +
                    as.factor(Year),
                  data = repdata_multi, family = binomial(link = "logit"))
multimod5glm_r <- multimod5glm %>% 
  coeftest(vcovHC(multimod5glm, type = 'HC0', cluster = c('Company', 'Summary')))

multimod6glm <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                      v2csreprss_home + v2csreprss_host + as.factor(Year) +
                      as.factor(naics2digit),
                  data = repdata_multi, family = binomial(link = "logit"))
multimod6glm_r <- multimod6glm %>% 
  coeftest(vcovHC(multimod6glm, type = 'HC0', cluster = c('Company', 'Summary')))

## appendix table 2 (to get estimates w/ cluster robust SEs)
stargazer(multimod1glm_r, multimod2glm_r, multimod3glm_r, multimod4glm_r,
          multimod5glm_r, multimod6glm_r, omit = c("as.factor"))

# appendix table 2 (to get obs + fit stats)
stargazer(basemod1glm, basemod2glm, basemod3glm, basemod4glm,
          basemod5glm, basemod6glm, omit = c("as.factor"))

##################################
## Conditional logit (clogit) models - full sample & multi-claim
#
## Appendix Table 3
##################################

basemod4clogit <- clogit(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                            v2csreprss_home + v2csreprss_host + strata(Company),
                          data = repdata_main)

basemod5clogit <- clogit(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                            v2csreprss_home + v2csreprss_host + as.factor(Year) + strata(Company),
                          data = repdata_main)

multimod4clogit <- clogit(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                              v2csreprss_home + v2csreprss_host + strata(Summary), data = repdata_multi)

multimod5clogit <- clogit(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                              v2csreprss_home + v2csreprss_host + strata(Year) +
                              strata(Summary), data = repdata_multi)

multimod6clogit <- clogit(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                              v2csreprss_home + v2csreprss_host + strata(Year) +
                              strata(Summary) + strata(naics2digit), data = repdata_multi)

# appendix table 3
stargazer(basemod4clogit, basemod5clogit, multimod4clogit,
          multimod5clogit, multimod6clogit, omit = c("as.factor"))

##################################
## Logit models - w/MSCI ESG ratings
#
## Appendix Table 4
##################################

mscimod1 <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  HUM_str_X, family = binomial(link = "logit"), data = repdata_msci)
mscimod1_r <- mscimod1 %>% 
  coeftest(vcovHC(mscimod1, type = 'HC0', cluster = c('Company', 'Summary')))

mscimod2 <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  HUM_str_X + v2csreprss_home + v2csreprss_host,
                family = binomial(link = "logit"), data = repdata_msci)
mscimod2_r <- mscimod2 %>% 
  coeftest(vcovHC(mscimod2, type = 'HC0', cluster = c('Company', 'Summary')))

mscimod3 <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  HUM_str_X + v2csreprss_home + v2csreprss_host +
                  as.factor(Year),
                family = binomial(link = "logit"), data = repdata_msci)
mscimod3_r <- mscimod3 %>% 
  coeftest(vcovHC(mscimod3, type = 'HC0', cluster = c('Company', 'Summary')))

mscimod4 <- glm(response_dummy ~ UNGC_Member + prior_claims + log_total_assets +
                  HUM_str_X + v2csreprss_home + v2csreprss_host +
                  as.factor(Year) + as.factor(naics2digit),
                family = binomial(link = "logit"), data = repdata_msci)
mscimod4_r <- mscimod4 %>% 
  coeftest(vcovHC(mscimod4, type = 'HC0', cluster = c('Company', 'Summary')))

## appendix table 4 (to get estimates w/ cluster robust SEs)
stargazer(mscimod1_r, mscimod2_r, mscimod3_r, mscimod4_r,
          omit = c("as.factor"))

# appendix table 4 (to get obs + fit stats)
stargazer(mscimod1, mscimod2, mscimod3, mscimod4,
          omit = c("as.factor"))

##################################
## First stage of IV models
#
## Appendix Table 5
##################################

ivmod1_firststage <- ivreg(UNGC_Member ~ sector_avg + country_avg, data = repdata_main)

ivmod2_firststage <- ivreg(UNGC_Member ~
                  sector_avg + country_avg + prior_claims + log_total_assets, data = repdata_main)

ivmod3_firststage <- ivreg(UNGC_Member ~
                  sector_avg + country_avg + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host, data = repdata_main)

ivmod4_firststage <- ivreg(UNGC_Member~
                  sector_avg + country_avg + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(Year), data = repdata_main)

ivmod5_firststage <- ivreg(UNGC_Member~
                  sector_avg + country_avg + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(naics2digit), data = repdata_main)

ivmod6_firststage <- ivreg(UNGC_Member~
                  sector_avg + country_avg + prior_claims + log_total_assets +
                  v2csreprss_home + v2csreprss_host + as.factor(Year) + as.factor(naics2digit),
                data = repdata_main)

## appendix table 5
stargazer(ivmod1_firststage, ivmod2_firststage, ivmod3_firststage,
          ivmod4_firststage, ivmod5_firststage, ivmod6_firststage,
          omit = c("as.factor", "Constant"))

################################################################
##
### Figures + Descriptive Tables
##
################################################################

## Figure 1: UNGC membership over time

ungc_membership <- read_csv("ungcmemberdata.csv")

ungc_membership <- ungc_membership %>% 
  mutate(joinyear = year(joineddate)) %>% 
  filter(firmtype == "Company" | firmtype == "Small or Medium-sized Enterprise" |
           firmtype == "Business Association Local" | firmtype == "Business Association Global")

ungc_mem_plot <- ungc_membership %>% 
  group_by(joinyear) %>% 
  summarize(newjoins = n()) %>% 
  mutate(MemberFirms = cumsum(newjoins)) %>% 
  ggplot(aes(x = joinyear, y = MemberFirms)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Member Firms") +
  theme_bw()

## Figure 2: UNGC Membership by industry

ungc_by_sector <- repdata_main %>% 
  filter(is.na(naics_name) == F) %>% 
  group_by(naics_name) %>% 
  summarize(n = n(), ungc = mean(UNGC_Member, na.rm = T)) %>%
  ggplot(aes(x = fct_reorder(as.factor(naics_name), ungc, .desc = T),
             y = ungc, fill = n)) +
  geom_bar(stat = "identity") +
  scale_fill_gradient(low = "grey", high = "black") +
  geom_hline(yintercept = .5, linetype = "dashed", alpha = .8) +
  coord_flip() +
  labs(x = "", y = "Proportion UNGC Members", fill = "# of observations") +
  theme_bw()

ggsave("ungc_by_sector.png", ungc_by_sector, width = 8, height = 4, units = "in")

## Table 1: Descriptive stats, overall and
## by UNGC membership status

# get full-sample summary stats
desc_stats_overall <- repdata_main %>% 
  skim() %>% 
  slice(-1:-5, -14:-16) %>% 
  select(Variable = skim_variable, Missing = n_missing,
         Mean = numeric.mean, SD = numeric.sd, Min = numeric.p0,
         Max = numeric.p100) %>% 
  xtable()

# get LaTeX code using xtable

print.xtable(desc_stats_overall, include.rownames = F)

# get summary stats by ungc membership status

desc_stats_ungc <- repdata_main %>% 
  filter(UNGC_Member == 1) %>% 
  skim() %>% 
  select(Variable = skim_variable, Missing = n_missing,
         Mean = numeric.mean, SD = numeric.sd) %>% 
  slice(-1:-6, -8, -9, -14:-16)

desc_stats_nonungc <- repdata_main %>% 
  filter(UNGC_Member == 0) %>% 
  skim() %>% 
  select(Variable = skim_variable, Missing = n_missing,
         Mean = numeric.mean, SD = numeric.sd) %>% 
  slice(-1:-6, -8, -9, -14:-16)

desc_stats <- desc_stats_ungc %>% 
  bind_cols(desc_stats_nonungc) %>% 
  select(-Variable...5) %>% 
  mutate(Diff = Mean...3 - Mean...7) %>% 
  xtable()

# get LaTeX code using xtable

print.xtable(desc_stats, include.rownames = F)

# do diff-in-means tests to get significance levels for table
repdata_ungc <- repdata_main %>% filter(UNGC_Member == 1)

repdata_nonungc <- repdata_main %>% filter(UNGC_Member == 0)

t.test(repdata_ungc$Year, repdata_nonungc$Year,
       alternative = "two.sided") # p < .001

t.test(repdata_ungc$prior_claims, repdata_nonungc$prior_claims,
       alternative = "two.sided") # p < .001

t.test(repdata_ungc$log_total_assets, repdata_nonungc$log_total_assets,
       alternative = "two.sided") # p < .001

t.test(repdata_ungc$v2csreprss_home, repdata_nonungc$v2csreprss_home,
       alternative = "two.sided") # p < .001

t.test(repdata_ungc$v2csreprss_host, repdata_nonungc$v2csreprss_host,
       alternative = "two.sided") # p < .001

t.test(repdata_ungc$v2csreprss_host, repdata_nonungc$v2csreprss_host,
       alternative = "two.sided") # p < .001


## Table 3: 10 largest multi-firm claims, response rate by
## UNGC member status. Note: I made this table manually,
## but this code identifies the 10 largest claims as well
## as the response rates for UNGC members and non-members.

repdata_multi %>% 
  group_by(Summary, UNGC_Member) %>% 
  summarize(nfirmstotal = mean(firmsnamed),
            nfirms = n(),
            res_rate = mean(response_dummy)) %>%
  ungroup %>% 
  arrange(desc(nfirmstotal)) %>% 
  slice(1:20) %>% 
  View()

## Figure 3: UNGC uptake by country

# find 12 states with the most firms on the global 2000 list

figure3_data %>% 
  group_by(Country) %>% 
  summarize(n = n()/19) %>% 
  arrange(desc(n)) %>% 
  slice(1:12)

# calculate membership trajectory for top 12 states

membership_by_country_data <- figure3_data %>%
  filter(Country %in% c("United States", "Japan", "China", "United Kingdom",
                        "France", "South Korea", "Hong Kong", "India",
                        "Canada", "Germany", "Switzerland", "Taiwan")) %>% 
  group_by(Country, Year) %>% 
  summarize(members = sum(ungc_member),
            prop_members = sum(ungc_member)/n())
 
membership_by_country_plot <- membership_by_country_data %>% 
  ggplot(aes(x = Year, y = prop_members, group = Country,
             label = Country)) +
  geom_line() +
  facet_wrap(vars(Country)) +
  labs(x = "", y = "Proportion UNGC Members") +
  theme_bw()

 ggsave("membership_by_country.png", membership_by_country_plot,
       width = 8, height = 5, units = "in")

################################################################
##
### Statistics referenced in the text
##
################################################################

## page 21 (footnote): "For comparison, the average firm receives
## 3.62 claims."

meanclaims <- repdata_main %>% group_by(Company) %>% summarize(n = n())
mean(meanclaims$n)

## page 23: "and four sectors have no UNGC member firms (though these four 
## sectors represent only 27 observations)."
repdata_main %>% 
  filter(is.na(naics_name) == F) %>% 
  group_by(naics_name) %>% 
  summarize(n = n(), ungc = mean(UNGC_Member, na.rm = T)) %>%
  filter(ungc == 0) %>% 
  select(n) %>% 
  sum()

## page 23: "979 of the 1,515 firm-claims in the dataset are associated 
## with a sector that has a UNGC membership rate between 40% and 60%"
repdata_main %>% 
  filter(is.na(naics_name) == F) %>% 
  group_by(naics_name) %>% 
  summarize(n = n(), ungc = mean(UNGC_Member, na.rm = T)) %>%
  filter(ungc >= .4 & ungc <= .6) %>% 
  select(n) %>% 
  sum()

